
Random.seed!(12345)
println("loading prices ... ")
Pij = CSV.read("$datapath/P$year.csv",Tables.matrix, header=false) ./ 1e6
print("loading data from .mat file: year $year ... ")
MCMC =  matread(datapath*"/export_to_julia_$(year).mat")["MCMC$year"]
println("done.")
MCMC["Pij"] = Pij

#save "data" outcomes for figures
df_dataoutcomes = DataFrame(ind=1:size(MCMC["Type0"],1), 
    place = Int.(vec(MCMC["placementIndex"])), enroll=vec(Int.(MCMC["enrollmentIndex"])),
    graduate = vec(Int.(MCMC["grad6"]))
)
df_dataoutcomes.year = [year for t=1:nrow(df_dataoutcomes)]
CSV.write(outputpath*"/dataoutcomes$(year).csv",df_dataoutcomes)

#save covariates for constructing figures etc
df_covariates = DataFrame(ind=1:size(MCMC["Type0"],1),type=vec(MCMC["Type0"]),
    mate=MCMC["Scores"][:,1] .* 100 .+ 300,
    leng=MCMC["Scores"][:,2] .* 100 .+ 300)
CSV.write(outputpath*"/covariates$(year).csv",df_covariates)
df_covariates = nothing

#save program characteristics for figures etc
df_programs = let
    institutionID = vec(Int.(MCMC["TypeIndex"][:,3]))
    fieldID = MCMC["TypeIndex"][:,1]
    Teaching = Int.(fieldID .== 7)
    G8 = institutionID .> 25
    DataFrame(programIndexThisYear = 1:length(G8), programID = Int.(MCMC["TypeIndex"][:,4]), institutionID = institutionID, G8 = G8, fieldID = fieldID, isTeaching = fieldID .== 7, year = [year for t=1:length(G8)])
end
CSV.write(outputpath*"/programs$(year).csv",df_programs)


mysample = findall([rand() < _sample_fraction for ii=1:Int(MCMC["I"])])
constants = setupData(MCMC,year,mysample,model,computeSimulatedOutcomes || getROLs) #only spend time constructing X's if want to run simulations. Don't need if just making figures.
if getROLs || computeSimulatedOutcomes || doExtraCounterfactuals || compare_spda_cpda
    cleanEnrollment!(constants)
    lv, temps = getLatentVars(constants)
    dataset[year] = (constants,lv,temps)
else
    dataset[year] = (constants,)
end

#get constants!
cf_constants = let
    priorities  = Array{Int,2}(MCMC["Priorities"])[mysample,:]
    eligible = Bool.(MCMC["EligibleToApply"])[mysample,:]
    (II,J) = size(priorities)
    _p = priorities .* Array{Bool,2}(eligible)
    priorityEligible = cat(_p,ones(Int,II),dims=2)
    _seats = MCMC["seats"]
    _seats[isnan.(_seats)] .= 0
    seats = vec(Int.(ceil.(_seats .* _sample_fraction)))
    _sobrecupo = MCMC["sobrecupo"]
    _sobrecupo[isnan.(_sobrecupo)] .= 0
    sobrecupo  = vec(Int.(ceil.(_sobrecupo .* _sample_fraction)))
    institutionID = vec(Int.(MCMC["TypeIndex"][:,3]))
    G8 = institutionID .> 25
    typeIndex = Int.(MCMC["TypeIndex"])
    scores=Float64.(MCMC["Scores"][mysample,:])
    placementIndex = Int.(MCMC["placementIndex"][mysample])
    @assert 8 == length(unique(institutionID[G8]))
    cf_constants = (priorityEligible = priorityEligible,
                    seats=seats, sobrecupo=sobrecupo,
                    institutionID=institutionID, G8=G8,
                    typeIndex=typeIndex,
                    scores=scores,
                    placementIndex=placementIndex)
end

if getROLs || computeSimulatedOutcomes || doExtraCounterfactuals || compare_spda_cpda
    cf_temps = let
        (J,I) = size(lv.U)
        Jtilde = J+1
        alphadraws = rand(Jtilde,I)
        alphadraws[Jtilde,:] .= 1 #2nd oo is never unavailable
        Ufull = zeros(Jtilde,I)
        (proposalcount=zeros(Int,Jtilde),point_at=zeros(Int,I),
        enroll = zeros(Int,Jtilde), U_pointed=zeros(I),Ufull=Ufull,
        programPrefs=zeros(Int,I,Jtilde), programROL=zeros(Int,I,Jtilde),
        alphadraws = alphadraws,
        Xoutcome = similar(temps.Xoutcome),
        )
    end
    counterfactuals[year] = (cf_constants,cf_temps)
else
    counterfactuals[year] = (cf_constants,)
end


Random.seed!(1234)
if compare_spda_cpda && (year==2012) #check uniqueness under observed ROL
    (JJ,II) = size(lv.U)
    lv.U .= -1
    lv.U0 .= [0.0; -2.0]
    RR = size(MCMC["Apps"],2)
    for ii=1:II
        for rr=1:RR
            isnan(MCMC["Apps"][ii,rr]) && break
            jj = Int(MCMC["Apps"][ii,rr])
            lv.U[jj,ii] = 1/rr
        end
    end
    theta = nothing
    cc = dataset[year][1]
    cpda_obs_alt,spda_obs_alt = compareAlgorithms(cc,lv,cf_constants,cf_temps,theta,true) #170 differ
    #generate strict priorities via random tiebreak instead of chilean tiebreak rules
    cfc = (cf_constants..., priorityEligible=cf_constants.priorityEligible .+ 0.0)
    cfc.priorityEligible .+= (x-> ((x==0) ? x : x + rand())).(cfc.priorityEligible)
    cft = (cf_temps..., programPrefs = cf_temps.programPrefs .+ 0.0)
    cpda_obs,spda_obs = compareAlgorithms(cc,lv,cfc,cft,theta,true) #6 differ
end


if (year==2012) && compare_spda_cpda
    df_2012 = DataFrame(year=[],iter=[],share=[],count=[])
    #load theta
    for t in save_lv
        println("loading cf preference draw $t")
        f = matopen(outputpath*"/params_$t.mat")
                    _theta_t = MAT.read(f)
                close(f)
        _keys = (collect(keys(_theta_t))...,)
        _vals = ([_theta_t[string(k)] for k in _keys]...,)
        theta = NamedTuple{Symbol.(_keys)}(_vals)
        #load lv
        fname_lv = outputpath*"/latentvariables2012_$t.mat"
        JLD2.@load(fname_lv,lv_out)
        cc,lv,temps = dataset[year]
        for field in keys(lv)
            lv[field] .= lv_out[field]
        end
        #generate strict priorities via random tiebreak instead of chilean tiebreak rules
        cfc = (cf_constants..., priorityEligible=cf_constants.priorityEligible .+ 0.0)
        cfc.priorityEligible .+= (x-> ((x==0) ? x : x + rand())).(cfc.priorityEligible)
        cft = (cf_temps..., programPrefs = cf_temps.programPrefs .+ 0.0)
        cpda,spda = compareAlgorithms(cc,lv,cfc,cft,theta)
        println("2012 counterfactuals iter $t: dif = $(sum(cpda .!= spda))")
        push!(df_2012,Dict(:year=>2012,:iter=>t,
            :share=>mean(cpda .!= spda), :count=>sum(cpda .!= spda)))
    end
    CSV.write(outputpath*"/compare_spda_cpda_conditional_$year.csv",df_2012)
end


if getROLs
    df_share_different = DataFrame(year=[],iter=[],share=[])
    for (r,t) in enumerate(save_lv)
        println("simulating ROLs: year $year, iter $t")
        f = matopen(outputpath*"/params_$t.mat")
                _theta_t = MAT.read(f)
            close(f)
        _keys = (collect(keys(_theta_t))...,)
        _vals = ([_theta_t[string(k)] for k in _keys]...,)
        theta_t = NamedTuple{Symbol.(_keys)}(_vals)
        cc = dataset[year][1]
        lv = dataset[year][2]
        temps = dataset[year][3]
        simulateROLs!(cc,lv,theta_t,temps) #get lv.U, lv.U0
        #generate strict priorities via random tiebreak instead of chilean tiebreak rules
        cfc = (cf_constants..., priorityEligible=cf_constants.priorityEligible .+ 0.0)
        cfc.priorityEligible .+= (x-> ((x==0) ? x : x +rand())).(cfc.priorityEligible)
        cft = (cf_temps..., programPrefs = cf_temps.programPrefs .+ 0.0)
        cpda,spda = compareAlgorithms(cc,lv,cfc,cft,theta_t) #compareAlgorithms(cc,lv,cf_constants,cf_temps,theta_t)
        push!(df_share_different,Dict(:year=>year,:iter=>r,:share=>mean(cpda .!= spda)))
        println("cpda != spda: $(100*mean(cpda.!=spda)) pct")
        saveROLs || continue
        #store ROL data
        JJ,II = size(lv.U)
        rol = [zeros(Int,JJ) for s = 1:Threads.nthreads()]
        apps = fill(0,JJ,II)
        Threads.@threads for ii=1:size(lv.U,2)
            _getApp!(rol[Threads.threadid()],view(lv.U,:,ii),lv.U0[1,ii])
            for (n,jj) in enumerate(rol[Threads.threadid()])
                jj==0 && break
                apps[jj,ii] = n
            end
        end
        CSV.write(outputpath*"/simulatedROL$(year)_$t.csv",Tables.table(apps'));
    end
    df_share_different[!,:n] = Int.(round.(df_share_different[!,:share] .* length(cc.sample)))
    CSV.write(outputpath*"/compare_spda_cpda_$year.csv",df_share_different)
end


if computeSimulatedOutcomes
    #load parameters
    history = Any[]
    for t in save_params
        f = matopen(outputpath*"/params_$t.mat")
            _theta_t = MAT.read(f)
        close(f)
        _keys = (collect(keys(_theta_t))...,)
        _vals = ([_theta_t[string(k)] for k in _keys]...,)
        theta_t = NamedTuple{Symbol.(_keys)}(_vals)
        push!(history,theta_t)
    end
    history = typeof(history[1])[history...]
    #simulate!
    c_means = Any[]
    c_individuals = Any[]
    c_byIter = Any[]
    for (r,t) in enumerate(save_lv)
        println("running counterfactuals: iter $t")
        lv_out = dataset[year][2]
        theta_t = history[findfirst(save_params.==t)]
        (df_means, df_individuals, df_byIter) = runSimulation(theta_t,dataset,counterfactuals,lv_out,dataset[year][3],year)
        push!(c_means,df_means)
        df_individuals[!,:draw] = r .* ones(Int,nrow(df_individuals))
        push!(c_individuals,df_individuals)
        push!(c_byIter,df_byIter)
    end
    df = vcat(c_individuals...)
    for col in names(df)
        if eltype(df[!,col]) == Float64
            df[!,col] = round.(df[!,col],digits=4)
        end
    end
    CSV.write(outputpath*"/simulate$(year)_big.csv",df)
elseif describeSimulatedOutcomes || doExtraCounterfactuals
    df = CSV.read(outputpath*"/simulate$(year)_big.csv",DataFrame)
end

# # count apps at UC and PUC
# ar = copy( dataset[year][1].AppsRelevant )
# is_UC_PUC = Set( Int.( unique( MCMC["TypeIndex"][:,4][(MCMC["TypeIndex"][:,3] .== 10) .| (MCMC["TypeIndex"][:,3] .== 11)])))
# for ii=1:size(ar,1)
#     for jj=1:size(ar,2)
#         ar[ii,jj]==0 && break
#         ar[ii,jj] = (ar[ii,jj] in is_UC_PUC)
#     end
# end

# n_UC_PUC = sum(ar,dims=2) #num apps to UC/PUC per person in relevant set
# [sum(n_UC_PUC .> t) for t=1:4]